# Web Scrape
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
# Explanatory Data Analysis
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default='notebook'
response = requests.get("https://www.house.gov/representatives")
soup = BeautifulSoup(response.text, 'html.parser')
tables = soup.select('table')
rows = []
for table in tables[:56]:
row = {}
row['District'] = [x.text.strip() for x in table.select('td')][0::6]
row['Name'] = [x.text.strip() for x in table.select('td')][1::6]
row['Party'] = [x.text.strip() for x in table.select('td')][2::6]
row['Office Room'] = [x.text.strip() for x in table.select('td')][3::6]
row['Phone'] = [x.text.strip() for x in table.select('td')][4::6]
row['Committee Assignment'] = [x.text.strip() for x in table.select('td')][5::6]
row['State'] = table.select_one('caption').text.strip()
rows.append(pd.DataFrame(row))
data = pd.concat(rows)
data
| District | Name | Party | Office Room | Phone | Committee Assignment | State | |
|---|---|---|---|---|---|---|---|
| 0 | 1st | Carl, Jerry L. | R | 1330 LHOB | (202) 225-4931 | Armed ServicesNatural Resources | Alabama |
| 1 | 2nd | Moore, Barry | R | 1504 LHOB | (202) 225-2901 | AgricultureVeterans' Affairs | Alabama |
| 2 | 3rd | Rogers, Mike | R | 2469 RHOB | (202) 225-3261 | Armed Services | Alabama |
| 3 | 4th | Aderholt, Robert | R | 266 CHOB | (202) 225-4876 | Appropriations | Alabama |
| 4 | 5th | Brooks, Mo | R | 2185 RHOB | (202) 225-4801 | Armed ServicesScience, Space, and Technology | Alabama |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 4 | 5th | Fitzgerald, Scott | R | 1507 LHOB | (202) 225-5101 | Education and LaborJudiciarySmall Business | Wisconsin |
| 5 | 6th | Grothman, Glenn | R | 1427 LHOB | (202) 225-2476 | BudgetEducation and LaborOversight and Reform | Wisconsin |
| 6 | 7th | Tiffany, Thomas P. | R | 1719 LHOB | (202) 225-3365 | Natural ResourcesJudiciary | Wisconsin |
| 7 | 8th | Gallagher, Mike | R | 1230 LHOB | (202) 225-5665 | Armed ServicesIntelligenceTransportation and I... | Wisconsin |
| 0 | At Large | Cheney, Liz | R | 416 CHOB | (202) 225-2311 | Armed ServicesSelect Committee to Investigate ... | Wyoming |
441 rows × 7 columns
file_path = '../Representatives - Output/Representatives ' + datetime.now().strftime("%d-%m-%Y %H%M%S") + ".csv" # folder location with file name
data.to_csv(file_path) # save data frame as csv in file location
fig = px.bar(data, x='State', color='Party', color_discrete_map={'R': 'red','D': 'blue'})
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_xaxes(tickangle=-45)
newnames = {'R':'Republicans','D':'Democrats'}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))
fig = px.bar(data[data.Party=='R'], x='State', color='Party', color_discrete_map={'R': 'red'})
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_xaxes(tickangle=-45)
newnames = {'R':'Republicans'}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))
fig = px.bar(data[data.Party=='D'], x='State', color='Party', color_discrete_map={'D': 'Blue'})
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_xaxes(tickangle=-45)
newnames = {'D':'Democrats'}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))
fig = px.pie(data, names='Party', color='Party', color_discrete_map={'R':'Red','D':'Blue'})
fig.update_traces(textfont_size=22)
newnames = {'R':'Republicans','D':'Democrats'}
data['Party'].replace(newnames).value_counts().rename_axis('Party').reset_index(name='Representatives')
| Party | Representatives | |
|---|---|---|
| 0 | Democrats | 226 |
| 1 | Republicans | 215 |
fig = px.bar(data, x='District', color='Party', color_discrete_map={'D': 'Blue', 'R':'Red'})
fig.update_layout(xaxis={'categoryorder':'total descending'})
newnames = {'R':'Republicans','D':'Democrats'}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))
fig.update_xaxes(tickangle=-45)